####################################################################################
#
# Replication file for: Ingram, Matthew C. 2016. "Mandates, Geography, and Networks: 
#     Diffusion of Criminal Procedure Reform in Mexico"
#     Latin American Politics and Society 58: 121-145.
#     DOI: http://dx.doi.org/10.1111/j.1548-2456.2016.00301.x
# Author: Matthew C. Ingram
# Affiliation: University at Albany, SUNY
# contact: mingram@albany.edu
# NOTE: this file complements other command files (see replication materials)
# 
####################################################################################

# set working directory (note: this is my filepath; change to your own working directory)
path <- "C:/Users/MI122167/Dropbox/Criminal Procedure/pubs/Ingram2016LAPS/replicationmats"
setwd(path)

# check to see if sub-directories exist; create them if they are not there already
dir.create(paste(path,"/","data", sep=""))
dir.create(paste(path,"/","figures", sep=""))

####################################################################################

# generating 2 random graphs (w1 and w2) for LAPS R&R
# NOTE: ex ante, erdos renyi random graph seems like a better test (May 12, 2015)

# w1: graph using 'sample' function
# randomly generated 32 start nodes and 32 end nodes
node1 <- c(1:32)
node2 <- sample(1:32, 32, replace=TRUE)
graph1 <- cbind(node1, node2)
graph1
plot(graph1)

dim(graph1)

# converting edgelist to adjacency matrix (unweighted)
w1 <- matrix(0, nrow=32, ncol=32)
w1[graph1] <- 1
w1

# checking results with visualizations

plot(graph.adjacency(w1))
plot(graph.adjacency(w1, mode="max"))

png(file="figures/randomgraph-sample.png")
plot(graph.adjacency(w1, mode="max"))
dev.off()

tiff(file="figures/randomgraph-sample.tif", width=6, height=6, units="in", res=300)
plot(graph.adjacency(w1, mode="max"))
dev.off()


# w2: graphs using erdos.renyi
# pr(tie) = 0.125 based on average number of spatial neighbors in actual Mexico shapefile
install.packages("igraph")
library(igraph)
graph2 <- erdos.renyi.game(32, .125, type=c("gnp"), directed=FALSE, loops=FALSE)
plot(graph2)
w2 <- get.adjacency(graph2, type=c("both"))

# w2[w2=.] <- 0
w2
str(w2)

# generate better visualizations

png(file="figures/randomgraph-erdosrenyi.png")
plot(graph2)
dev.off()

tiff(file="figures/randomgraph-erdosrenyi.tif", width=6, height=6, units="in", res=300)
plot(graph2)
dev.off()



# test multiplication on dummy data
y <- as.vector(sample(1:32, 32, replace=TRUE))
y
w1y <- w1%*%y
w1y

w2y <- w2%*%y
w2y

data <- as.matrix(cbind(w1y, w2y))
library(weights)

cor(data)

pairs(~ data[,1] + data[,2], data=data)

# plot correlations #2
library(car)
scatterplotMatrix(~ data[,1] + data[,2], data=data)

##############
"""
generating weighted values of crimpro data
need to:
(1) load data for each year
(2) multiply w by data: W %*% y
(3) add new weighted data to data set
(4) run all analyses with new data

ALSO: see if Moran's I for crimpro changing over time

"""
#############


###############
""" 
5 Dvs in earlier versions were:
add1 (i1)
add1leg (i2)
add1imp (i3)
add2ccp (i4)
add2les (i5)
####
However, only DV examined in published article is i4 (add2ccp)
"""
# alternative: the following code works, as well
# if have needed data, skip to reshaping and extracting data below
i1.02 <- data$add1[data$year==2002] 
i1.03 <- data$add1[data$year==2003] 
i1.04 <- data$add1[data$year==2004] 
#... etc ...
i2 <- data$add1leg
i3 <- data$add1imp
i4 <- data$add2ccp
i5 <- data$add2les

# Reshaping and extracting data from wide data sets
# load wide data sets for each DV
library(foreign)

# i4
# elsewhere, generated wide format of lags where each row identified a state (by code) and 
# each identified the dv for a particular year (e.g., 2002)
# in July 2014, the date of the file used for the published article, this file was
# the following, which is included in replication materials:
data <- read.dta('crimprodata_2014-7jul21_postLASA_add2ccp_wide.dta')
names(data)

# now we generate a vector for each column (vector) of data

i4.02 <- as.vector(data[1:32,"i42002"])
i4.03 <- as.vector(data[1:32,"i42003"])
i4.04 <- as.vector(data[1:32,"i42004"])
i4.05 <- as.vector(data[1:32,"i42005"])
i4.06 <- as.vector(data[1:32,"i42006"])
i4.07 <- as.vector(data[1:32,"i42007"])
i4.08 <- as.vector(data[1:32,"i42008"])
i4.09 <- as.vector(data[1:32,"i42009"])
i4.10 <- as.vector(data[1:32,"i42010"])
i4.11 <- as.vector(data[1:32,"i42011"])

i4.11
i4.11[32]
# shows NA
i4.11[32] <- 4.1756

# and lag these values using the random graphs just generated
w1i4.02 <- w1%*%i4.02
w1i4.03 <- w1%*%i4.03
w1i4.04 <- w1%*%i4.04
w1i4.05 <- w1%*%i4.05
w1i4.06 <- w1%*%i4.06
w1i4.07 <- w1%*%i4.07
w1i4.08 <- w1%*%i4.08
w1i4.09 <- w1%*%i4.09
w1i4.10 <- w1%*%i4.10
w1i4.11 <- w1%*%i4.11

w2i4.02 <- w2%*%i4.02
w2i4.03 <- w2%*%i4.03
w2i4.04 <- w2%*%i4.04
w2i4.05 <- w2%*%i4.05
w2i4.06 <- w2%*%i4.06
w2i4.07 <- w2%*%i4.07
w2i4.08 <- w2%*%i4.08
w2i4.09 <- w2%*%i4.09
w2i4.10 <- w2%*%i4.10
w2i4.11 <- w2%*%i4.11

# bind the vectors into a single data frame
w1i4wide <- as.data.frame(cbind(w1i4.02,w1i4.03,w1i4.04,w1i4.05,w1i4.06,w1i4.07,w1i4.08,w1i4.09,w1i4.10,w1i4.11))
w1i4wide
dim(w1i4wide)

w2i4wide <- (cbind(w2i4.02,w2i4.03,w2i4.04,w2i4.05,w2i4.06,w2i4.07,w2i4.08,w2i4.09,w2i4.10,w2i4.11))
w2i4wide
dim(w2i4wide)
str(w2i4wide)

# convert dgeMatrix to regular matrix and then to data frame
w2i4wide <- as.data.frame(as.matrix(w2i4wide))

# and reshape to tall form
w1i4tall <- reshape(w1i4wide,
                    varying = c("V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10"), 
                    v.names = "i4",
                    timevar = "year", 
                    times = c("2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011"), 
                    new.row.names = 1:(32*10),
                    direction = "long")

w1i4tall
summary(w1i4tall[,"i4"])

w2i4tall <- reshape(w2i4wide,
                    varying = c("V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10"), 
                    v.names = "i4",
                    timevar = "year", 
                    times = c("2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011"), 
                    new.row.names = 1:(32*10),
                    direction = "long")

w2i4tall
summary(w2i4tall[,"i4"])


# write out two new DVs to file
library(foreign)
dvs <- data.frame(cbind(i4l.02, i4l.03, i4l.04, i4l.05, i4l.06, i4l.07, i4l.08, i4l.09, i4l.10, i4l.11,
             i4i.02, i4i.03, i4i.04, i4i.05, i4i.06, i4i.07, i4i.08, i4i.09, i4i.10, i4i.11))
dvs
write.dta(dvs, "2newdvs2015.dta")
# spot checked dvs by id and these dvs match those by state-year in master data set

w2i4i.02 <- w2%*%i4i.02
w2i4i.03 <- w2%*%i4i.03
w2i4i.04 <- w2%*%i4i.04
w2i4i.05 <- w2%*%i4i.05
w2i4i.06 <- w2%*%i4i.06
w2i4i.07 <- w2%*%i4i.07
w2i4i.08 <- w2%*%i4i.08
w2i4i.09 <- w2%*%i4i.09
w2i4i.10 <- w2%*%i4i.10
w2i4i.11 <- w2%*%i4i.11

w2i4iwide <- (cbind(w2i4i.02,w2i4i.03,w2i4i.04,w2i4i.05,w2i4i.06,w2i4i.07,w2i4i.08,w2i4i.09,w2i4i.10,w2i4i.11))
w2i4iwide
dim(w2i4iwide)
str(w2i4iwide)
# convert dgeMatrix to regular matrix and then to data frame
w2i4iwide <- as.data.frame(as.matrix(w2i4iwide))

w2i4itall <- reshape(w2i4iwide,
                     varying = c("V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10"), 
                     v.names = "i4i",
                     timevar = "year", 
                     times = c("2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011"), 
                     new.row.names = 1:(32*10),
                     direction = "long")
w2i4itall
summary(w2i4itall[,"i4i"])



'''
# row bind w1 lags
w1i1 <- rbind(w1i1.02,w1i1.03,w1i1.04,w1i1.05,w1i1.06,w1i1.07,w1i1.08,w1i1.09,w1i1.10,w1i1.11)
w1i2 <- rbind(w1i2.02,w1i2.03,w1i2.04,w1i2.05,w1i2.06,w1i2.07,w1i2.08,w1i2.09,w1i2.10,w1i2.11)
w1i3 <- rbind(w1i3.02,w1i3.03,w1i3.04,w1i3.05,w1i3.06,w1i3.07,w1i3.08,w1i3.09,w1i3.10,w1i3.11)
w1i4 <- rbind(w1i4.02,w1i4.03,w1i4.04,w1i4.05,w1i4.06,w1i4.07,w1i4.08,w1i4.09,w1i4.10,w1i4.11)
w1i5 <- rbind(w1i5.02,w1i5.03,w1i5.04,w1i5.05,w1i5.06,w1i5.07,w1i5.08,w1i5.09,w1i5.10,w1i5.11)
'''

# col bind all w1 and w2 lags
w1lags <- cbind(w1i1tall, w1i2tall["i2"], w1i3tall["i3"], w1i4tall["i4"], w1i5tall["i5"])
w2lags <- cbind(w2i1tall, w2i2tall["i2"], w2i3tall["i3"], w2i4tall["i4"], w2i5tall["i5"])
head(w1lags)
head(w2lags)

# col bind two recent lags of i4l and i4i
w2lags2 <- cbind(w2i4ltall, w2i4itall["i4i"])
head(w2lags2)

'''
# Dont use this library(plyr); it is only temporary and does not actually change variable
# name in data
library(plyr)
names(w1lags)[names(w1lags)=="i1"] <- "w1i1"
names(w1lags)[names(w1lags)=="i2"] <- "w1i2"
names(w1lags)[names(w1lags)=="i3"] <- "w1i3"
names(w1lags)[names(w1lags)=="i4"] <- "w1i4"
names(w1lags)[names(w1lags)=="i5"] <- "w1i5"
'''

names(w2lags)[names(w2lags)=="i1"] <- "w2i1"
names(w2lags)[names(w2lags)=="i2"] <- "w2i2"
names(w2lags)[names(w2lags)=="i3"] <- "w2i3"
names(w2lags)[names(w2lags)=="i4"] <- "w2i4"
names(w2lags)[names(w2lags)=="i5"] <- "w2i5"
names(lags)[names(lags)=="id"] <- "code"

names(w2lags2)[names(w2lags2)=="i4l"] <- "w2i4l"
names(w2lags2)[names(w2lags2)=="i4i"] <- "w2i4i"
names(w2lags2)[names(w2lags2)=="id"] <- "code"

head(w1lags)
head(w2lags)

head(w2lags2)

lags <- cbind(w1lags, w2lags[,c("w2i1","w2i2","w2i3","w2i4","w2i5")])
lags
head(lags)
summary(lags)


library(foreign)
write.dta(lags, "data/randomgraphlags.dta")

write.dta(w2lags2, "data/randomgraphlags2.dta")

# Note:
# the data in the two datasets written to file were subsequently merged with the main 
# data file by state-year, and appear in the full replication data set.

# END 
